# -*- coding: UTF-8 -*-
import scrapy
import json
import messAround.util.help as util


# 知乎热榜
# https://www.zhihu.com/billboard
# scrapy crawl zhihu_topic
class ZhihuTopSpider(scrapy.Spider):
    source = 1

    name = 'zhihu_topic'

    allowed_domains = ['www.zhihu.com']

    start_urls = ['https://www.zhihu.com/billboard']

    headers = {
        ':authority': 'www.zhihu.com',
        ':method': 'GET',
        ':path': '/billboard',
        ':scheme': 'https',
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'zh-CN,zh;q=0.9',
        'cache-control': 'no-cache',
        'pragma': 'no-cache',
        'sec-ch-ua': ' Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91',
        'sec-ch-ua-mobile': '?0',
        'sec-fetch-dest': 'document',
        'sec-fetch-mode': 'navigate',
        'sec-fetch-site': 'none',
        'sec-fetch-user': '?1',
        'upgrade-insecure-requests': '1',
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    }

    def start_requests(self):
        headers = {
            'Host': 'www.zhihu.com',
            'TE': 'Trailers',
            'Connection': 'keep-alive',
            ':authority': 'www.zhihu.com',
            ':method': 'GET',
            ':path': '/billboard',
            ':scheme': 'https',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
            'accept-encoding': 'gzip, deflate, br',
            'accept-language': 'zh-CN,zh;q=0.9',
            'cache-control': 'no-cache',
            'pragma': 'no-cache',
            'sec-ch-ua': ' Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91',
            'sec-ch-ua-mobile': '?0',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'none',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        }

        url = "https://www.zhihu.com/billboard"
        yield scrapy.Request(url=url, callback=self.parse, headers=headers)

    def parse(self, response):
        print(response.text)
        #
        # item_xpath = '//*[@id="js-initialData"]/text()'
        #
        # json_str_result = response.xpath(item_xpath).get()
        #
        # result = json.loads(json_str_result)
        #
        # res = result['initialState']['topstory']['hotList']
        #
        # for key, val in enumerate(res):
        #     yield {
        #         'source': self.source,
        #         'no': key + 1,
        #         'title': val['target']['titleArea']['text'],
        #         'cover': val['target']['imageArea']['url'],
        #         'link': val['target']['link']['url'],
        #         'info': val['target']['excerptArea']['text'],
        #         'hot_index': val['target']['metricsArea']['text'],
        #     }
        # pass
